import os, sys
import requests
import re
from lxml import etree

pwd = os.getcwd()
sys.path.append(pwd)
from proxy.proxy import ValidIp

headers = {
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'
}

base_url = 'https://product.360che.com'

zaishou_url = '/{}.html'
tingshou_url = '/e4/{}.html'
jijiangshangshi_url = '/e3/{}.html'

host = 'http://192.168.50.149:4011'

# 向接口发送数据
def post_kache_data(data):
    r = requests.post(host + '/api/v1/kachezhijia/xingxi', data=data)
    return(r.text)

# 获取下一页
def get_next_page(response):
    html = etree.HTML(response.text)
    next_page_a = html.xpath('//*[@id="tractor_price"]/div/ul/a[@class="pages-wd"]/@href')[-1]
    next_page = base_url + next_page_a
    return next_page

# 获取汽车参数
def get_car_info(url, sales_status, proxies):
    response = requests.get(url, headers=headers, proxies=proxies)
    html = etree.HTML(response.text)
    car_sum = len(html.xpath('//*[@id="fixed_top"]/th'))
    param_sum = len(html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table/tbody/tr/td[1]'))
    print(car_sum, param_sum)
    list_param_name = ['data_id:', 'sales_status:', '车名:', '厂商指导价:']
    list_param_name1 = html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table[1]/tbody/tr/td[1]/text()')
    list_param_name.extend(list_param_name1)
    # print(list_param_name)
    for i in range(2, car_sum+1):
        list_param = []
        data_id = html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table/thead/tr[1]/th[{}]/div/div[@class="tool"]/span[@class="compare"]/@data-id'.format(i))[0].strip()
        list_param.append(data_id)
        list_param.append(str(sales_status))
        car_name = html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table/thead/tr[1]/th[{}]/div/h5/a/text()'.format(i))[0].strip()
        list_param.append(car_name)
        changshangzhidaojia = html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table/thead/tr[2]/td[{}]/text()'.format(i))[0].strip()
        list_param.append(changshangzhidaojia)
        list_param1 = html.xpath('//*[@id="mybody"]/div[@class="wrapper"]/div[@class="parameter-detail"]/table/tbody/tr/td[{}]/div/text()[1]'.format(i))
        # print(list_param1)
        for param in list_param1:
            list_param.append(param.strip())
        # print(list_param, len(list_param))
        canshu = {}
        # print(list_param_name)
        for index in range(len(list_param)):
            key = list_param_name[index].strip()[:-1]
            value = list_param[index]
            canshu[key] = value
        data = {'canshu': canshu}
        with open('canshu1.csv', 'a') as f:
            f.write(str(canshu)+'\n')
        print(data)
        # post_kache_data(data)

# 下载
def download_info(status_url, sales_status):
    proxies = ValidIp()
    url = (base_url + status_url).format(1)
    response = requests.get(url, headers=headers, proxies=proxies)
    html = etree.HTML(response.text)
    list_car_href = html.xpath('//*[@id="productList"]/li/div[@class="content"]/a/@href')
    while list_car_href:
        for car_url in list_car_href:
            car_url = car_url.replace('index', 'param') # 把详情页换成参数页
            car_url = base_url + car_url
            get_car_info(car_url, sales_status, proxies)
        url = get_next_page(response)
        print(url)
        response = requests.get(url, headers=headers, proxies=proxies)
        html = etree.HTML(response.text)
        list_car_href = html.xpath('//*[@id="productList"]/li/div[@class="content"]/a/@href')

if __name__=='__main__':
    list_status_url = ['/{}.html', '/e4/{}.html', '/e3/{}.html']
    for i in range(2, 3):
        download_info(list_status_url[i], i)


